package com.jlt.baidu;

import java.util.ArrayList;
import java.util.List;

import org.bytedeco.tesseract.TessBaseAPI;
import org.opencv.core.Core;
import org.opencv.core.Mat;
import org.opencv.core.MatOfPoint;
import org.opencv.core.MatOfPoint2f;
import org.opencv.core.Point;
import org.opencv.core.Rect;
import org.opencv.core.Scalar;
import org.opencv.core.Size;
import org.opencv.highgui.HighGui;
import org.opencv.imgproc.Imgproc;

import com.jlt.baidu.utils.Opencv420Util;

/**
 * 去除表格横线测试类
 * 
 * @author Ives.Chen
 *
 */
public class ExcelLineUtil {

    public static void init(TessBaseAPI api) {
        String opencv420 = "F:\\workSpace\\JLTMdmWorkSpace\\jlt-baidu\\src\\main\\resources\\opencv\\java\\x64\\opencv_java420.dll";
        System.load(opencv420);
        String tesseractPath = "F:\\workSpace\\JLTMdmWorkSpace\\jlt-baidu\\src\\main\\resources\\tessdata\\";
        if (api.Init(tesseractPath, "chi_sim+eng") != 0) {
            throw new RuntimeException("tesseract init fail");
        }
    }

    public static void main(String[] args) {
        TessBaseAPI api = new TessBaseAPI();
        init(api);

        String fileName = "F:\\workSpace\\OCR\\suyue\\predict\\2019-12-12_1.png";
        String fileNameRes = "F:\\workSpace\\OCR\\suyue\\predict\\2019-12-12_1_result.png";
        String fileNameLine = "F:\\workSpace\\OCR\\suyue\\predict\\2019-12-12_1_line.png";
        String fileNameLine1 = "F:\\workSpace\\OCR\\suyue\\predict\\2019-12-12_1_line1.png";
        String fileNameAll = "F:\\workSpace\\OCR\\suyue\\predict\\2019-12-12_1_all.png";
        String fileNameBet = "F:\\workSpace\\OCR\\suyue\\predict\\2019-12-12_1_bet.png";
        String fileNameSplit = "F:\\workSpace\\OCR\\suyue\\predict\\2019-12-12_1_Split.png";

        Mat mat = Opencv420Util.imagerLoad(fileName);
        // 灰度
        Mat first = Opencv420Util.imageGray(mat.clone());

        // 图片二值化，去除背景，增强图片
        Mat mat1 = Opencv420Util.thresholdBlackGround(first.clone());

        // 膨胀+腐蚀:补全表格线内的空洞
        Mat element = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(3, 3));
        Imgproc.dilate(mat1, mat1, element);
        Imgproc.erode(mat1, mat1, element);

        // 获取图片上的横线 2
        Mat horizontalLine = getHorizontal(mat1.clone());
        Opencv420Util.imageSave(fileNameLine1, horizontalLine);

        // 获取图片上的竖线 1
        Mat verticalLine = getVertical(mat1.clone());
        Opencv420Util.imageSave(fileNameLine, verticalLine);


        // 将横线和竖线合并为一张图片
        Mat tableLine = unionAllExcelLine(horizontalLine, verticalLine);
        Opencv420Util.imageSave(fileNameAll, tableLine);

        /*
         * 通过 bitwise_and 定位横线、垂直线交汇的点
         */
        Mat points_image = new Mat();
        Core.bitwise_and(horizontalLine, verticalLine, points_image);
        Opencv420Util.imageSave(fileNameBet, points_image);

        Mat resa = dropExcelArea(mat.clone(), tableLine, points_image);

        // Mat resa = dropNoInExcelArea(mat.clone(), tableLine, points_image);
        Opencv420Util.imageSave(fileNameSplit, resa);

        // Opencv420Util.imageSave(fileNameRes, resa);
        // // 去除非表格内容
        // List<Mat> tables = singleExcelArea(mat, tableLine, points_image);
        // 所有的表格区域图像
        // for (int i = 0; i < tables.size(); i++) {
        // Opencv420Util.imageSave("F:\\workSpace\\OCR\\suyue\\predict\\2019-12-12_1" + "_table-" + (i + 1) + ".png", tables.get(i));
        // }

    }

    /**
     * OpenCV-4.0.0 霍夫变换-直线检测
     * 
     * @return:void
     * @date: 2019年1月18日 上午9:18:08
     */
    public static void houghLines(Mat src, Mat gray) {
        Mat lines = new Mat(); // 1.边缘处理
        Imgproc.Canny(src, gray, 100, 200); // 2.霍夫变换-直线检测
        Imgproc.HoughLinesP(gray, lines, 1, Imgproc.HOUGH_GRADIENT / 180.0, 1, 0, 0);
        double[] date;
        for (int i = 0, len = lines.rows(); i < len; i++) {
            date = lines.get(i, 0).clone();
            Imgproc.line(src, new Point((int) date[0], (int) date[1]), new Point((int) date[2], (int) date[3]), new Scalar(0, 255, 0), 2,
                    Imgproc.LINE_AA);
        }
        HighGui.imshow("直线检测", src);
        HighGui.waitKey(0);
    }

    /**
     * 得到表格横线并相连
     * 
     * @param adaptiveThreshold
     * @return
     */
    public static Mat getHorizontal(Mat adaptiveThreshold) {
        int scale = 100;
        int horizontalsize = adaptiveThreshold.cols() / scale;
        Mat horizontalStructure = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(horizontalsize, 1));
        Imgproc.erode(adaptiveThreshold, adaptiveThreshold, horizontalStructure);
        Imgproc.dilate(adaptiveThreshold, adaptiveThreshold, horizontalStructure);
        return adaptiveThreshold;
    }

    /**
     * 得到表格竖线并进行相连，竖线的模糊度要低
     * 
     * @param adaptiveThreshold
     * @return
     */
    public static Mat getVertical(Mat adaptiveThreshold) {
        int scale = 50;
        int verticalsize = adaptiveThreshold.rows() / scale;
        if (verticalsize == 0) {
            verticalsize = 1;
        }
        Mat verticalStructure = Imgproc.getStructuringElement(Imgproc.MORPH_RECT, new Size(1, verticalsize));
        Imgproc.erode(adaptiveThreshold, adaptiveThreshold, verticalStructure);
        Imgproc.dilate(adaptiveThreshold, adaptiveThreshold, verticalStructure);
        return adaptiveThreshold;
    }

    /**
     * 表格横竖线合并成一张图
     * 
     * @param horizontal
     * @param vertical
     * @return
     */
    public static Mat unionAllExcelLine(Mat horizontal, Mat vertical) {
        Mat or = new Mat();
        Core.bitwise_or(horizontal, vertical, or);
        return or;
    }

    /**
     * 从原图去除表格区域内容
     * 
     * @param srcImage 原图
     * @param mask_image 横线竖线合并图
     * @param points_image 横线竖线交点图
     */
    public static Mat dropExcelArea(Mat srcImage, Mat mask_image, Mat points_image) {
        Mat result = srcImage.clone();
        List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
        Mat hierarchy = new Mat();
        Imgproc.findContours(mask_image, contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE, new Point(0, 0));

        List<MatOfPoint> contours_poly = contours;
        Rect[] boundRect = new Rect[contours.size()];

        // 循环所有找到的轮廓-点
        for (int i = 0; i < contours.size(); i++) {

            MatOfPoint point = contours.get(i);
            MatOfPoint contours_poly_point = contours_poly.get(i);

            /*
             * approxPolyDP 函数用来逼近区域成为一个形状，true值表示产生的区域为闭合区域。比如一个带点幅度的曲线，变成折线
             *
             * MatOfPoint2f curve：像素点的数组数据。 MatOfPoint2f approxCurve：输出像素点转换后数组数据。 double epsilon：判断点到相对应的line segment
             * 的距离的阈值。（距离大于此阈值则舍弃，小于此阈值则保留，epsilon越小，折线的形状越“接近”曲线。） bool closed：曲线是否闭合的标志位。
             */
            Imgproc.approxPolyDP(new MatOfPoint2f(point.toArray()), new MatOfPoint2f(contours_poly_point.toArray()), 3, true);

            // 为将这片区域转化为矩形，此矩形包含输入的形状
            boundRect[i] = Imgproc.boundingRect(contours_poly.get(i));

            // 找到交汇处的的表区域对象
            Mat table_image = points_image.submat(boundRect[i]);

            List<MatOfPoint> table_contours = new ArrayList<MatOfPoint>();
            Mat joint_mat = new Mat();
            Imgproc.findContours(table_image, table_contours, joint_mat, Imgproc.RETR_CCOMP, Imgproc.CHAIN_APPROX_SIMPLE);
            // 从表格的特性看，如果这片区域的点数小于4，那就代表没有一个完整的表格，忽略掉
            if (table_contours.size() < 4) {
                continue;
            }
            // 将白色矩形覆盖至表格，实现删除图像中表格内容
            Imgproc.rectangle(result, boundRect[i].tl(), boundRect[i].br(), new Scalar(255, 255, 255), -1, 4, 0);
        }
        return result;
    }

    /**
     * 从原图去除非表格区域内容
     * 
     * @param srcImage 原图
     * @param mask_image 横线竖线合并图
     * @param points_image 横线竖线交点图
     */
    public static Mat dropNoInExcelArea(Mat srcImage, Mat mask_image, Mat points_image) {
        Mat result = srcImage.clone();
        List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
        Mat hierarchy = new Mat();
        Imgproc.findContours(mask_image, contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE, new Point(0, 0));

        List<MatOfPoint> contours_poly = contours;
        Rect[] boundRect = new Rect[contours.size()];

        // 循环所有找到的轮廓-点
        for (int i = 0; i < contours.size(); i++) {

            MatOfPoint point = contours.get(i);
            MatOfPoint contours_poly_point = contours_poly.get(i);

            Imgproc.approxPolyDP(new MatOfPoint2f(point.toArray()), new MatOfPoint2f(contours_poly_point.toArray()), 3, true);

            // 为将这片区域转化为矩形，此矩形包含输入的形状
            boundRect[i] = Imgproc.boundingRect(contours_poly.get(i));

            // 找到交汇处的的表区域对象
            Mat table_image = points_image.submat(boundRect[i]);

            List<MatOfPoint> table_contours = new ArrayList<MatOfPoint>();
            Mat joint_mat = new Mat();
            Imgproc.findContours(table_image, table_contours, joint_mat, Imgproc.RETR_CCOMP, Imgproc.CHAIN_APPROX_SIMPLE);
            // 从表格的特性看，如果这片区域的点数小于4，那就代表没有一个完整的表格，忽略掉
            if (table_contours.size() > 4) {
                continue;
            }
            // 将白色矩形覆盖至表格，实现删除图像中表格内容
            Imgproc.rectangle(result, boundRect[i].tl(), boundRect[i].br(), new Scalar(255, 255, 255), -1, 4, 0);
        }
        return result;
    }

    /**
     * 从原图抠出表格区域内容
     * 
     * @param srcImage 原图
     * @param mask_image 横线竖线合并图
     * @param points_image 横线竖线交点图
     */
    public static List<Mat> singleExcelArea(Mat srcImage, Mat mask_image, Mat points_image) {
        List<MatOfPoint> contours = new ArrayList<MatOfPoint>();
        Mat hierarchy = new Mat();
        Imgproc.findContours(mask_image, contours, hierarchy, Imgproc.RETR_EXTERNAL, Imgproc.CHAIN_APPROX_SIMPLE, new Point(0, 0));

        List<MatOfPoint> contours_poly = contours;
        Rect[] boundRect = new Rect[contours.size()];

        List<Mat> tables = new ArrayList<Mat>();

        // 循环所有找到的轮廓-点
        for (int i = 0; i < contours.size(); i++) {

            MatOfPoint point = contours.get(i);
            MatOfPoint contours_poly_point = contours_poly.get(i);

            /*
             * 获取区域的面积 第一个参数，InputArray contour：输入的点，一般是图像的轮廓点 第二个参数，bool oriented = false:表示某一个方向上轮廓的的面积值，顺时针或者逆时针，一般选择默认false
             */
            double area = Imgproc.contourArea(contours.get(i));
            // 如果小于某个值就忽略，代表是杂线不是表格
            if (area < 100) {
                continue;
            }

            /*
             * approxPolyDP 函数用来逼近区域成为一个形状，true值表示产生的区域为闭合区域。比如一个带点幅度的曲线，变成折线
             *
             * MatOfPoint2f curve：像素点的数组数据。 MatOfPoint2f approxCurve：输出像素点转换后数组数据。 double epsilon：判断点到相对应的line segment
             * 的距离的阈值。（距离大于此阈值则舍弃，小于此阈值则保留，epsilon越小，折线的形状越“接近”曲线。） bool closed：曲线是否闭合的标志位。
             */
            Imgproc.approxPolyDP(new MatOfPoint2f(point.toArray()), new MatOfPoint2f(contours_poly_point.toArray()), 3, true);

            // 为将这片区域转化为矩形，此矩形包含输入的形状
            boundRect[i] = Imgproc.boundingRect(contours_poly.get(i));

            // 找到交汇处的的表区域对象
            Mat table_image = points_image.submat(boundRect[i]);

            List<MatOfPoint> table_contours = new ArrayList<MatOfPoint>();
            Mat joint_mat = new Mat();
            Imgproc.findContours(table_image, table_contours, joint_mat, Imgproc.RETR_CCOMP, Imgproc.CHAIN_APPROX_SIMPLE);
            // 从表格的特性看，如果这片区域的点数小于4，那就代表没有一个完整的表格，忽略掉
            if (table_contours.size() < 4) {
                continue;
            }
            // 将表格添加到集合
            tables.add(srcImage.submat(boundRect[i]).clone());
        }
        return tables;
    }
}
